Load Required Libraries

##Load libraries
library(dplyr) #all data
library(tidyr) #all data
library(readxl)#Import data 2
library(ggplot2)#all data
library(plotrix) #data1
library(plotly) #data 2 https://plot.ly/ggplot2/animations/
library(gapminder) #data2 https://plot.ly/ggplot2/animations/
Data1 posted by Michael Silva (Adult Arrests)

Get Data 1

theurl <- "https://data.ny.gov/api/views/rikd-mt35/rows.csv?accessType=DOWNLOAD"
thedata <- read.table(file = theurl, header = TRUE, sep = ",")
glimpse(thedata)
## Observations: 3,118
## Variables: 13
## $ County            <fct> Albany, Allegany, Bronx, Broome, Cattaraugus...
## $ Year              <int> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20...
## $ Total             <int> 7115, 827, 47420, 5076, 1688, 1304, 3433, 20...
## $ Felony.Total      <int> 2547, 256, 15736, 1424, 559, 361, 1021, 548,...
## $ Drug.Felony       <int> 524, 46, 3121, 271, 174, 40, 159, 121, 64, 1...
## $ Violent.Felony    <int> 527, 66, 6009, 313, 88, 81, 202, 133, 52, 12...
## $ DWI.Felony        <int> 137, 32, 141, 71, 58, 25, 71, 47, 25, 57, 37...
## $ Other.Felony      <int> 1359, 112, 6465, 769, 239, 215, 589, 247, 13...
## $ Misdemeanor.Total <int> 4568, 571, 31684, 3652, 1129, 943, 2412, 147...
## $ Drug.Misd         <int> 843, 49, 6177, 782, 149, 82, 481, 297, 78, 1...
## $ DWI.Misd          <int> 807, 121, 954, 422, 267, 166, 409, 231, 120,...
## $ Property.Misd     <int> 1536, 123, 7786, 1271, 333, 320, 663, 496, 1...
## $ Other.Misd        <int> 1382, 278, 16767, 1177, 380, 375, 859, 455, ...

Tidy Data Offense

Gather all offense totals and display GGPlots

head(thedata)
##        County Year Total Felony.Total Drug.Felony Violent.Felony
## 1      Albany 2018  7115         2547         524            527
## 2    Allegany 2018   827          256          46             66
## 3       Bronx 2018 47420        15736        3121           6009
## 4      Broome 2018  5076         1424         271            313
## 5 Cattaraugus 2018  1688          559         174             88
## 6      Cayuga 2018  1304          361          40             81
##   DWI.Felony Other.Felony Misdemeanor.Total Drug.Misd DWI.Misd
## 1        137         1359              4568       843      807
## 2         32          112               571        49      121
## 3        141         6465             31684      6177      954
## 4         71          769              3652       782      422
## 5         58          239              1129       149      267
## 6         25          215               943        82      166
##   Property.Misd Other.Misd
## 1          1536       1382
## 2           123        278
## 3          7786      16767
## 4          1271       1177
## 5           333        380
## 6           320        375
tddata1.1<-thedata%>%
    select( -Felony.Total, -Misdemeanor.Total, -Year, -Total) %>%
    gather(offense, value, 2:9)%>%
    group_by(offense)%>%
    summarise_each(funs(sum), value)%>%
    mutate(pctallcrime = round(value/sum(thedata$Total)*100,5))%>%
    arrange(desc(value))
tddata1.1
## # A tibble: 8 x 3
##   offense          value pctallcrime
##   <chr>            <int>       <dbl>
## 1 Other.Misd     5045892      22.5  
## 2 Property.Misd  4739379      21.1  
## 3 Other.Felony   3424829      15.2  
## 4 Drug.Misd      2915865      13.0  
## 5 Violent.Felony 2505386      11.2  
## 6 DWI.Misd       1955704       8.71 
## 7 Drug.Felony    1670570       7.44 
## 8 DWI.Felony      203720       0.907

GGPlot

options("scipen" = 20)
lbls <- paste(tddata1.1$offense, round(tddata1.1$pctallcrime,0)) 
lbls <- paste(lbls,"%",sep="") 
pie(tddata1.1$value, labels = lbls, col = rainbow(length(tddata1.1$value)),  main="Pie Chart of Offenses")

ggplot(data= tddata1.1) +
    geom_point(mapping = aes(x = pctallcrime, y = value, size = pctallcrime, color = offense))

ggplot(data= tddata1.1) +
    geom_bar(mapping = aes(x = offense, y = value, fill= offense), stat = "identity", position = "identity")+
#theme(axis.text.x=element_text(angle=-90))
theme(axis.text.x = element_blank())

Tidy Data by Top 10 Counties

Gather all offense totals and display GGplots by Counties

tddata1.2<-thedata%>%
    select( -Felony.Total, -Misdemeanor.Total, -Year, -Total) %>%
    gather(offense, value, 2:9)%>%
    group_by(County)%>%
    summarise_each(funs(sum), value)%>%
    mutate(pctallcrime = round(value/sum(thedata$Total)*100,5))%>%
    arrange(desc(value))%>%
    top_n(10, value)
tddata1.2
## # A tibble: 10 x 3
##    County        value pctallcrime
##    <fct>         <int>       <dbl>
##  1 New York    4155471       18.5 
##  2 Kings       3345798       14.9 
##  3 Bronx       2634506       11.7 
##  4 Queens      1922751        8.56
##  5 Erie        1121703        4.99
##  6 Suffolk     1050851        4.68
##  7 Nassau       834180        3.71
##  8 Monroe       780495        3.47
##  9 Westchester  718231        3.20
## 10 Onondaga     503772        2.24

GGPlot

lbls <- paste(tddata1.2$County, round(tddata1.1$pctallcrime,0)) 
lbls <- paste(lbls,"%",sep="") 
pie(tddata1.2$value, labels = lbls, col = rainbow(length(tddata1.2$value)),  main="Pie Chart by Top 10 County")

ggplot(data= tddata1.2) +
    geom_point(mapping = aes(x = pctallcrime, y = value, size = pctallcrime, color = County))

tddata1.3<-thedata%>%
    select( -Felony.Total, -Misdemeanor.Total, -Year, -Total) %>%
    gather(offense, value, 2:9)%>%
    group_by(County, offense)%>%
    summarise_each(funs(sum), value)%>%
    mutate(pctallcrime = round(value/sum(thedata$Total)*100,5))%>%
    arrange(desc(value))

ggplot(data= tddata1.3) +
    geom_bar(mapping = aes(x = County, y = value, fill= offense), stat = "identity", position = "identity")+
    theme(axis.text.x=element_text(angle=-90, size=9))

Data2 posted by Juanelle Marks (Population Migration)

http://www.un.org/en/development/desa/population/migration/data/estimates2/estimates17.shtml

Get Data 2

theurl2 <- "http://www.un.org/en/development/desa/population/migration/data/empirical2/data/UN_MigFlow_Totals.xlsx"
destfile <- "UN_MigFlow_Totals.xlsx"
download.file(theurl2, destfile, mode = "wb")
thedata2<-read_xlsx("UN_MigFlow_Totals.xlsx", skip = 16)
glimpse(thedata2)
## Observations: 229
## Variables: 38
## $ CntName  <chr> "Armenia", "Armenia", "Australia", "Australia", "Aust...
## $ Criteria <chr> "Residence", "Residence", "Residence", "Residence", "...
## $ Type     <chr> "Emigrants", "Immigrants", "Emigrants", "Immigrants",...
## $ Coverage <chr> "Both", "Both", "Both", "Both", "Citizens", "Foreigne...
## $ `1980`   <chr> "..", "..", "90860", "184290", "..", "..", "..", ".."...
## $ `1981`   <chr> "..", "..", "85600", "212690", "..", "..", "..", ".."...
## $ `1982`   <chr> "..", "..", "92340", "195200", "..", "..", "..", ".."...
## $ `1983`   <chr> "..", "..", "100510", "153570", "..", "..", "..", ".....
## $ `1984`   <chr> "..", "..", "96360", "153530", "..", "..", "..", ".."...
## $ `1985`   <chr> "..", "..", "93440", "172550", "..", "..", "..", ".."...
## $ `1986`   <chr> "..", "..", "92450", "196690", "..", "..", "..", ".."...
## $ `1987`   <chr> "..", "..", "97770", "221620", "..", "..", "..", ".."...
## $ `1988`   <chr> "..", "..", "104770", "253860", "..", "..", "..", ".....
## $ `1989`   <chr> "..", "..", "120040", "238050", "..", "..", "..", ".....
## $ `1990`   <chr> "..", "..", "137470", "234050", "..", "..", "..", ".....
## $ `1991`   <chr> "..", "..", "143710", "237240", "..", "..", "..", ".....
## $ `1992`   <chr> "..", "..", "143660", "220460", "..", "..", "..", ".....
## $ `1993`   <chr> "..", "..", "140420", "197940", "..", "..", "..", ".....
## $ `1994`   <chr> "..", "..", "141680", "221920", "..", "..", "..", ".....
## $ `1995`   <chr> "..", "..", "149360", "253940", "..", "..", "..", ".....
## $ `1996`   <chr> "..", "..", "158260", "261330", "17136", "46725", "12...
## $ `1997`   <chr> "..", "..", "176560", "260220", "18830", "48264", "13...
## $ `1998`   <chr> "..", "..", "179600", "268390", "19407", "44865", "13...
## $ `1999`   <chr> "..", "..", "185670", "289870", "19644", "47279", "14...
## $ `2000`   <chr> "12030", "1767", "206120", "317560", "18224", "46248"...
## $ `2001`   <chr> "11901", "1764", "216130", "356410", "21644", "51010"...
## $ `2002`   <chr> "10433", "1715", "222940", "361990", "30353", "44478"...
## $ `2003`   <chr> "8482", "1926", "224890", "388450", "23056", "48940",...
## $ `2004`   <chr> "8451", "1514", "212200", "350990", "21703", "50018",...
## $ `2005`   <chr> "9303", "1497", "206690", "363470", "20333", "49800",...
## $ `2006`   <chr> "8053", "1335", "204800", "402210", "19387", "55045",...
## $ `2007`   <chr> "7461", "1112", "216580", "460650", "17828", "32070",...
## $ `2008`   <chr> "6121", "864", "220280", "535970", "18168", "33395", ...
## $ `2009`   <chr> "4100", "861", "..", "..", "16376", "36868", "8988", ...
## $ `2010`   <chr> "..", "..", "..", "..", "16059", "35592", "8817", "62...
## $ `2011`   <chr> "..", "..", "..", "..", "14401", "36796", "8082", "74...
## $ `2012`   <chr> "..", "..", "..", "..", "15443", "36369", "8272", "83...
## $ `2013`   <chr> "..", "..", "..", "..", "15368", "38703", "9237", "92...

Tidy Data by Year Totals

Gather all year totals and display dynamic year to year changes in plotly

head(thedata2)
## # A tibble: 6 x 38
##   CntName Criteria Type  Coverage `1980` `1981` `1982` `1983` `1984` `1985`
##   <chr>   <chr>    <chr> <chr>    <chr>  <chr>  <chr>  <chr>  <chr>  <chr> 
## 1 Armenia Residen~ Emig~ Both     ..     ..     ..     ..     ..     ..    
## 2 Armenia Residen~ Immi~ Both     ..     ..     ..     ..     ..     ..    
## 3 Austra~ Residen~ Emig~ Both     90860  85600  92340  100510 96360  93440 
## 4 Austra~ Residen~ Immi~ Both     184290 212690 195200 153570 153530 172550
## 5 Austria Citizen~ Emig~ Citizens ..     ..     ..     ..     ..     ..    
## 6 Austria Citizen~ Emig~ Foreign~ ..     ..     ..     ..     ..     ..    
## # ... with 28 more variables: `1986` <chr>, `1987` <chr>, `1988` <chr>,
## #   `1989` <chr>, `1990` <chr>, `1991` <chr>, `1992` <chr>, `1993` <chr>,
## #   `1994` <chr>, `1995` <chr>, `1996` <chr>, `1997` <chr>, `1998` <chr>,
## #   `1999` <chr>, `2000` <chr>, `2001` <chr>, `2002` <chr>, `2003` <chr>,
## #   `2004` <chr>, `2005` <chr>, `2006` <chr>, `2007` <chr>, `2008` <chr>,
## #   `2009` <chr>, `2010` <chr>, `2011` <chr>, `2012` <chr>, `2013` <chr>
tddata2<-thedata2%>%
    gather(year, total, "1980":"2013")%>%
    filter(total != "..")

tddata2$total<- as.integer(tddata2$total)
    
tddata2<- tddata2%>%
    group_by(CntName,year)%>%
    summarise_each(funs(sum), total)%>%
    arrange(year)#%>%
    #filter(CntName %in% c("United States of America","Germany","France"))
#
#View(tddata2)
#tddata2 = NULL

GGPlot

1990 -1991 USSR Colapses, USSR Migration Explodes.

2008 Black Market, Market Crashes, Germany migration Declines

require(plotly)
p <- ggplot(tddata2, aes(total/1000000, year, color = reorder(CntName, -total)))+
  geom_point(aes(size = total, frame = year, ids = CntName))+
  scale_fill_continuous()+
  labs(color = "Country Names", x = "Migration by Millions", y = "Years")

p <- ggplotly(p) %>%
    animation_opts(2000, easing = "elastic", redraw = TRUE)%>%
    add_annotations(
    yref="paper", 
    xref="paper", 
    y=1.1, 
    x=0, 
    text="Migration By Year/Country", 
    showarrow=F, 
    font=list(size=17)
  ) %>% 
  layout(title=FALSE)
p
# p = NULL

Data2 posted by Ravi Itwaru (Airline Safety)

https://raw.githubusercontent.com/fivethirtyeight/data/master/airline-safety/airline-safety.csv

Get data 3

theurl3<-"https://raw.githubusercontent.com/fivethirtyeight/data/master/airline-safety/airline-safety.csv"
thedata3 <- read.table(file = theurl3, header = TRUE, sep = ",") 
glimpse(thedata3)
## Observations: 56
## Variables: 8
## $ airline                <fct> Aer Lingus, Aeroflot*, Aerolineas Argen...
## $ avail_seat_km_per_week <dbl> 320906734, 1197672318, 385803648, 59687...
## $ incidents_85_99        <int> 2, 76, 6, 3, 2, 14, 2, 3, 5, 7, 3, 21, ...
## $ fatal_accidents_85_99  <int> 0, 14, 0, 1, 0, 4, 1, 0, 0, 2, 1, 5, 0,...
## $ fatalities_85_99       <int> 0, 128, 0, 64, 0, 79, 329, 0, 0, 50, 1,...
## $ incidents_00_14        <int> 0, 6, 1, 5, 2, 6, 4, 5, 5, 4, 7, 17, 1,...
## $ fatal_accidents_00_14  <int> 0, 1, 0, 0, 0, 2, 1, 1, 1, 0, 0, 3, 0, ...
## $ fatalities_00_14       <int> 0, 88, 0, 0, 0, 337, 158, 7, 88, 0, 0, ...

Tidy Data by Airline Incident Counts

Gather all incident totals and dipsplay GGPlot with Top 10 Airlines Incidents,then compare incidents to kilometers per week.

head(thedata3)
##                 airline avail_seat_km_per_week incidents_85_99
## 1            Aer Lingus              320906734               2
## 2             Aeroflot*             1197672318              76
## 3 Aerolineas Argentinas              385803648               6
## 4           Aeromexico*              596871813               3
## 5            Air Canada             1865253802               2
## 6            Air France             3004002661              14
##   fatal_accidents_85_99 fatalities_85_99 incidents_00_14
## 1                     0                0               0
## 2                    14              128               6
## 3                     0                0               1
## 4                     1               64               5
## 5                     0                0               2
## 6                     4               79               6
##   fatal_accidents_00_14 fatalities_00_14
## 1                     0                0
## 2                     1               88
## 3                     0                0
## 4                     0                0
## 5                     0                0
## 6                     2              337
tddata3<-thedata3%>%
    gather(incidents, count, 3:8)
    
tddata3$count<- as.integer(tddata3$count)

tddata3<-tddata3%>%
    filter(count > 0)%>%
    select(airline, count, avail_seat_km_per_week)%>%
    group_by(airline)%>%
    summarise_each(funs(sum), count, avail_seat_km_per_week)%>%
    arrange(desc(count))%>%
    top_n(10)

GGPlot Top 10 Airport Incidents

ggplot(data= tddata3) +
    geom_bar(mapping = aes(x = reorder(airline, -count), y = count, fill= airline), stat = "identity", position = "identity", show.legend = FALSE)+
theme(axis.text.x=element_text(angle=-90))

ggplot(data= tddata3) +
    geom_point(mapping = aes(x = avail_seat_km_per_week, y =count, color = airline, size = count))

#theme(axis.text.x = element_blank())